# ======================================================================================= #
# Lebo/Kraft 2017. "The General Error Correction Model in Practice" (Research & Politics) #
# Code to reproduce all analyses, simulations, and plots                                  #
# ======================================================================================= #

## Set working directory
#setwd("SET WORKING DIRECTORY HERE")

## Install and load all required packages
pkg <- c("foreign","tidyverse","fracdiff","tseries","gridExtra")
inst <- pkg %in% installed.packages()
if(length(pkg[!inst]) > 0) install.packages(pkg[!inst])
invisible(lapply(pkg, library, character.only = TRUE))
rm(list = ls())

## Load custom functions
source("func.R")


### Figure 1(a) - 1(c)
## Examine false negatives on ADF test and 
## false positives on alpha*_1 for unrelated series

## Simulate time series
set.seed(20160728)
res <- rbind(simDfAlpha(nt=50), simDfAlpha(nt=100), simDfAlpha(nt=250)
             , simDfAlpha(nt=50, par="rho"), simDfAlpha(nt=100, par="rho")
             , simDfAlpha(nt=250, par="rho"))
res$par <- factor(res$par, levels = c("rho","d"), labels=c("rho","italic(d)"))

## Compute 95% confidence intervals
qlo <- function(x) quantile(x, .025)
qhi <- function(x) quantile(x, .975)

## Compute MacKinnon critical values
res$mk_crit <- macKinnon(as.numeric(as.character(res$nt)))

## False positives (alpha1_t < mk_crit given that DF fails to reject)
res$false_positive <- with(res, (alpha1_t < mk_crit) * (dfval >= -2.88))

## Summarize parameter estimates
res2 <- res %>% group_by(nt, par, val) %>% summarise_each(funs(mean,qlo,qhi))

## Figure 1(a)
ggplot(res2[res2$par=="rho",], aes(x=alpha1_mean, y=dfval_mean)) + 
  geom_point(aes(size=val, shape=nt)) + geom_hline(yintercept=-2.88) + 
  geom_errorbar(aes(ymin = dfval_qlo, ymax = dfval_qhi)) + 
  theme_classic() + theme(panel.border = element_rect(fill=NA)) + 
  xlab("Mean value of ECM parameter") +
  ylab("Mean value and 95% intervals for DF test statistic") +
  scale_shape_discrete(name="Number of\nTime Points") +
  scale_size_continuous(name = expression(paste("\u03C1", " Value"))
                        , breaks = seq(0,.9,.1))
ggsave("fig1a.png",height=5,width=7, dpi = 300)

## Figure 1(b)
ggplot(res2[res2$par=="italic(d)",], aes(x=alpha1_mean, y=dfval_mean)) + 
  geom_point(aes(size=val, shape=nt)) + geom_hline(yintercept=-2.88) + 
  geom_errorbar(aes(ymin = dfval_qlo, ymax = dfval_qhi)) + 
  theme_classic() + theme(panel.border = element_rect(fill=NA)) + 
  xlab("Mean value of ECM parameter") +
  ylab("Mean value and 95% intervals for DF test statistic") +
  scale_shape_discrete(name="Number of\nTime Points") +
  scale_size_continuous(name = expression(paste(italic("d"), " Value"))
                        , breaks = seq(0,.9,.1)) +
  guides(shape = guide_legend(order=1), size = guide_legend(order=2))
ggsave("fig1b.png",height=5,width=7, dpi = 300)

## Figure 1(c)
res %>% group_by(par, val, nt) %>% 
  summarize(mean = mean(false_positive)) %>%
  ggplot(aes(y=val, x=nt, fill = mean)) + 
  geom_tile() + geom_text(aes(label=mean)) +
  facet_wrap(~par, labeller = label_parsed) +
  theme_classic() + theme(panel.border = element_rect(fill=NA)) +
  xlab("Number of Time Points") +
  ylab(expression(paste("Value of \u03C1 / ", italic("d")))) +
  scale_fill_gradient2(limits=c(0,.5), low="white", high="grey40"
                       ,name="Proportion of\nFalse Positives") +
ggsave("fig1c.png",height=5,width=7, dpi = 300)

## Compute additional information about simulations mentioned in text
res %>% filter(nt==50) %>% group_by(par, val, nt) %>% 
  summarize(mean_alpha = mean(alpha1), df_falseneg=mean(dfval>=-2.93))

## Percent of false negatives in ADF with white noise
mean(replicate(1000,adf.test(rnorm(50))$statistic)>=-2.93)


### Figure 2: Stock and Watson’s (2011) cointegration example:
## Three-month and one-year T-bill rates, error correction rate = 52%

## Load data
stock <- read.dta("stock2011.dta")
stock$date <- as.numeric(gsub(":.+","",stock$datxx)) + seq(0,.75,length.out=4)

## Create figure
png("fig2.png", height = 5, width = 7, unit = "in", res = 300)
par(mar = c(3,2,1,1),mgp=c(1,0,0),cex=1,tck=.01)
plot(fygt1~date, type="l", ylim=c(-2,20)
     , xlab="Year", ylab="Interest Rate", data=stock)
lines(fygm3~date, lty=2, data=stock)
legend("bottomleft", bty="n", lty=c(1,2)
       , legend=c("One Year Treasury Bill Rate","3-Month Treasury Bill Rate"))
dev.off()


### Figure 3: Kelly and Enns' (2010) data in Table 1 Model 4; error correction rate = 55%

## Load data
kelly2010 <- read.dta("kelly2010.dta")
kelly2010reduced <- kelly2010[kelly2010$year>1972,]

## Create figure
png("fig3.png", height = 5, width = 7, unit = "in", res = 300)
par(mar = c(3,2,1,2),mgp=c(1,0,0),cex=1,tck=.01)
plot(welfare~year, type="l", xlim=c(1970,2010), ylim=c(-25,45)
     , xlab="Year", ylab="Welfare Attitudes / Policy Liberalism"
     , data=kelly2010reduced)
lines(policy~year, lty=2, data=kelly2010reduced)
par(new = T)
plot(gini~year, type = "l", xlim=c(1970,2010), ylim=c(.3,.5)
     , lty = 3, axes = F, xlab=NA, ylab=NA, data = kelly2010reduced)
axis(side = 4)
mtext(side = 4, line = 1, 'Gini Index',cex=1)
legend("bottomleft", bty="n", lty=c(1,2,3)
       , legend=c("Welfare Attitudes","Policy Liberalism","Gini Index"))
dev.off()


### Figure 4: Kelly and Enns' (2010) data for Table 1 Model 2

## Create figure
png("fig4.png", height = 5, width = 7, unit = "in", res = 300)
par(mar = c(3,2,1,2),mgp=c(1,0,0),cex=1,tck=.01)
plot(mood~year, type="l", xlim=c(1950,2010), ylim=c(-40,80)
     , xlab="Year", ylab="Public Mood Liberalism / Policy Liberalism"
     , data=kelly2010)
lines(policy~year, lty=2, data=kelly2010)
par(new = T)
plot(gini~year, type = "l", xlim=c(1950,2010), ylim=c(.3,.5)
     , lty = 3, axes = F, xlab=NA, ylab=NA, data = kelly2010)
axis(side = 4)
mtext(side = 4, line = 1, 'Gini Index',cex=1)
legend("bottomleft", bty="n"
       , legend=c("Public Mood Liberalism","Policy Liberalism","Gini Index")
       , lty=c(1,2,3))
dev.off()


### Figure 5: Casillas et al.’s (2011) data for Table 1, error correction rate = 83% 

## Load data
casillas <- read.dta("casillas2011.dta")

## Create figure
png("fig5.png", height = 5, width = 7, unit = "in", res = 300)
par(mar = c(1,3,1,3),mgp=c(1,0,0),cex=1,tck=.01)
plot(all_rev~term, type ="l", ylim=c(0,100), col = "red3"
     , xlab=NA, ylab="% Liberal Supreme Court Decisions", data = casillas)
par(new = T)
plot(mood~term, type = "l", ylim=c(35,75), col = "blue3"
     , lty = 2, axes = F, xlab=NA, ylab=NA, data = casillas)
axis(side = 4)
mtext(side = 4, line = 1, 'Public Mood',cex=1)
legend("bottomright", bty="n"
       , legend=c("Liberal Supreme Court Decisions (all reversals)","Public Mood")
       , lty=c(1,2), col=c("red","blue"))
dev.off()


### Figure 6: simulate ECM for unrelated series, compare to Casillas et al. 2011

## ECM estimates for Casillas et al. 2011 (bivariate specification)
m6_all <- gecmEst(casillas$all_rev, casillas$mood)
m6_nosal <- gecmEst(casillas$nosal_rev, casillas$mood)
m6_sal <- gecmEst(casillas$sal_rev, casillas$mood)

## Simulate estimates
set.seed(20160728)
res6 <- simDfAlpha(nsim=25,nstep=100,nt=60)

## Add Casillas info
seg <- data.frame(model = c("All reviews","Non-salient reviews", "Salient reviews")
                  , val = c(0.63,0.65,0.30) # Grant/Lebo 2016 (PA, p.20), RATS estimation
                  , alpha1 = c(coef(m6_all)["iv_ly"],coef(m6_nosal)["iv_ly"],coef(m6_sal)["iv_ly"])
                  , alpha1_se = c(sqrt(diag(vcov(m6_all))["iv_ly"]), sqrt(diag(vcov(m6_nosal))["iv_ly"])
                                  , sqrt(diag(vcov(m6_sal))["iv_ly"]))
                  , xpos = 0.75)
seg$alpha1_t <- seg$alpha1 / seg$alpha1_se

## Create figure
p1 <- ggplot(res6, aes(x=val, y=alpha1_t)) + geom_point(alpha=.2,size=.5) + 
  xlim(0,1) + theme_classic() + ggtitle("Significance of ECM") + ylab("T-Statistic") +
  xlab(expression(paste("Level of Integration (",italic("d"), ")"))) +
  geom_segment(aes(x=0,xend=val,y=alpha1_t,yend=alpha1_t),data=seg) +
  geom_segment(aes(x=val,xend=val,y=-11,yend=alpha1_t),data=seg) +
  geom_segment(aes(x=val,xend=xpos,y=alpha1_t,yend=alpha1_t-1),linetype=3,data=seg) +
  geom_text(aes(x=xpos,y=alpha1_t-1,label=model)
            , nudge_x=.01, hjust="outward", size=2, data=seg) +
  geom_point(col="red",data=seg)

p2 <- ggplot(res6, aes(x=val, y=alpha1)) + geom_point(alpha=.2,size=.5) + 
  xlim(0,1) + theme_classic() + geom_hline(yintercept=-1, linetype=2) +
  ggtitle("Coefficient of ECM") + ylab(expression(paste(hat(alpha)[1],"*"))) +
  xlab(expression(paste("Level of Integration (",italic("d"), ")"))) +
  geom_segment(aes(x=0,xend=val,y=alpha1,yend=alpha1),data=seg) +
  geom_segment(aes(x=val,xend=val,y=-1.3,yend=alpha1),data=seg) +
  geom_segment(aes(x=val,xend=xpos,y=alpha1,yend=alpha1-.1),linetype=3,data=seg) +
  geom_text(aes(x=xpos,y=alpha1-.1,label=model)
            , nudge_x=.01, hjust="outward", size=2, data=seg) +
  geom_point(col="red",data=seg)

png("fig6.png",height=5,width=7,unit="in",res=300)
grid.arrange(p1,p2,ncol=2,widths=c(.48,.52))
dev.off()
